In [None]:
%matplotlib widget

import ipywidgets
from ipywidgets import FloatSlider, IntSlider, Layout, interactive
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import axes3d
from matplotlib import cm

# Datos de entrenamiento

$$\mathcal{D}_\text{train}$$

| $x_1$ | $x_2$ | $y$ |
|-------|-------|-----|
| 0     | 2     | +1  |
| -2    | 0     | +1  |
| 1     | -1    | -1  |

In [None]:
Dtrain = (
    (( 0,  2), +1),
    ((-2,  0), +1),
    (( 1, -1), -1),
)

In [None]:
plt.close()

xmin, xmax = -3, 3
ymin, ymax = -3, 3

def plot_Dtrain(ax):
    x1s, x2s, ys = [], [], []
    for (x1, x2), y in Dtrain:
        x1s.append(x1)
        x2s.append(x2)
        ys.append("orange" if y > 0 else "green")
    return ax.scatter(x1s, x2s, c = ys)

plt.ioff()

fig = plt.figure();
ax = plt.axes();
plot_Dtrain(ax)
ax.set_xlabel("$x_1$", size=20)
ax.set_xlim((xmin, xmax))
ax.set_ylabel("$x_2$", size=20)
ax.set_ylim((ymin, ymax))
fig.canvas.header_visible = False

display(fig.canvas);

# Clase de hipótesis

$$\begin{aligned}
\mathcal{F} &= \left\{ f_{\mathbf{w}} : \mathbf{w} \in \mathbb{R}^2 \right\} \\
f_\mathbf{w}(x) &= \mathrm{sign}(\mathbf{w}\cdot\phi(x)) \\
\phi(x) &= x
\end{aligned}$$

In [None]:
def features(x):
    """
    Regresa el vector de características de la entrada x.
    """
    return x

In [None]:
def predict(w, x):
    """
    Predice la salida de la entrada x con el predictor dado por el vector de pesos w.
    """
    return sign(dotprod(w, features(x)))

In [None]:
def dotprod(v1, v2):
    """
    Calcula el producto punto entre los vectores v1 y v2.
    """
    return sum(c1 * c2 for c1, c2 in zip(v1, v2))

In [None]:
def sign(z):
    if z > 0:
        return +1
    if z < 0:
        return -1
    return 0

In [None]:
plt.close()

x1min, x1max = -3, 3
x2min, x2max = -3, 3
s = 666

def plot_model(ax, w):
    v1 = -w[1]
    v2 = +w[0]
    u1 = w[1]
    u2 = -w[0]
    return [
        ax.arrow(0, 0, w[0], w[1], head_width = 0.1, color = "blue"),
        ax.arrow(0, 0, s*v1, s*v2, color = "blue"),
        ax.arrow(0, 0, s*u1, s*u2, color = "blue"),
    ]

plt.ioff()

w1_init, w1_min, w1_max, w1_step = 0.5, -10.0, 10.0, 0.1
w2_init, w2_min, w2_max, w2_step = 1.0, -10.0, 10.0, 0.1

w = [w1_init, w2_init]

def model_label(w1, w2):
    p1 = f"{w1:+.2f}"
    p2 = f"{w2:+.2f}"
    return "$f_{\\mathbf{w}}([x_1,x_2]) = sign([" + p1 + "," + p2 + "]\cdot\phi(x))$"

fig = plt.figure(figsize=(6,6))
ax = plt.axes()
plot_Dtrain(ax)
lines = plot_model(ax, w)
model = ax.text(
    x1min + 0.1,
    x1max - 0.1,
    model_label(0.0, 0.0),
    size = 14,
    va = "top",
)
ax.set_xlabel("$x_1$", size=20)
ax.set_xlim((x1min, x1max))
ax.set_ylabel("$x_2$", size=20)
ax.set_ylim((x2min, x2max))
fig.canvas.header_visible = False

def update_plot(w1, w2):
    v1 = -w2
    v2 = +w1
    u1 = w2
    u2 = -w1
    lines[0].set_data(x = 0, y = 0, dx = w1, dy = w2)
    lines[1].set_data(x = 0, y = 0, dx = s*v1, dy = s*v2)
    lines[2].set_data(x = 0, y = 0, dx = s*u1, dy = s*u2)
    fig.canvas.draw()
    fig.canvas.flush_events()

widget = interactive(
    update_plot,
    w1 = FloatSlider(
        orientation = "horizontal",
        description = "w1",
              value = w1_init,
                min = w1_min,
                max = w1_max,
               step = w1_step,
             layout = Layout(width='90%')
    ),
    w2 = FloatSlider(
        orientation = "horizontal",
        description = "w2",
              value = w2_init,
                min = w2_min,
                max = w2_max,
               step = w2_step,
             layout = Layout(width='90%')
    ),
)

display(widget)
display(fig.canvas)

# Función de pérdida

Pérdida cero-uno

$$\begin{aligned}
\text{Loss}(x, y, \mathbf{w}) &= \mathbf{1}[f_\mathbf{w}(x) \not= y] \\
                              &= \mathbf{1}[\underbrace{(\mathbf{w}\cdot\phi(x))y}_{\text{margen}} \leq 0] \\
\text{TrainLoss}(\mathbf{w}) &= \frac{1}{|\mathcal{D}_\text{train}|} \sum_{(x, y)\in\mathcal{D}_\text{train}} \text{Loss}(x, y, \mathbf{w})
\end{aligned}$$

In [None]:
def loss(x, y, w):
    """
    Calcula la pérdida cero-uno para la entrada x y salida y con respecto al
    predictor dado por el vector de pesos w.
    """
    return 1 if margin(x, y, w) <= 0 else 0

In [None]:
def margin(x, y, w):
    """
    Calcula el margen entre la predicción para la entrada x con respecto al
    predictor dado por el vector de pesos w y la salida y.
    """
    return dotprod(w, features(x))*y

In [None]:
def train_loss(Dtrain, loss, w):
    """
    Calcula la pérdida de entrenamiento para el predictor dado por el vector de
    pesos w.
    """
    examples = len(Dtrain)
    total = sum(loss(x, y, w) for x, y in Dtrain)
    return  total / examples

In [None]:
plt.close()

x1min, x1max = -3, 3
x2min, x2max = -3, 3
s = 666

plt.ioff()

w1_init, w1_min, w1_max, w1_step = 0.5, -10.0, 10.0, 0.1
w2_init, w2_min, w2_max, w2_step = 1.0, -10.0, 10.0, 0.1

w = [w1_init, w2_init]

def train_loss_label(err):
    v = f"{err:.4f}"
    return "$\\mathrm{TrainLoss}(\\mathbf{w}) = "+v+"$"

fig = plt.figure(figsize=(6,6))
ax = plt.axes()
dots = plot_Dtrain(ax)
lines = plot_model(ax, w)
model = ax.text(
    x1min + 0.1,
    x1max - 0.1,
    model_label(0.0, 0.0),
    size = 14,
    va = "top",
)
error = ax.text(
    xmin + 0.1,
    ymax - 0.6,
    train_loss_label(train_loss(Dtrain, loss, w)),
    size = 18,
    va = "top",
)
ax.set_xlabel("$x_1$", size=20)
ax.set_xlim((x1min, x1max))
ax.set_ylabel("$x_2$", size=20)
ax.set_ylim((x2min, x2max))
fig.canvas.header_visible = False

def update_plot(w1, w2):
    v1 = -w2
    v2 = +w1
    u1 = w2
    u2 = -w1
    lines[0].set_data(x = 0, y = 0, dx = w1, dy = w2)
    lines[1].set_data(x = 0, y = 0, dx = s*v1, dy = s*v2)
    lines[2].set_data(x = 0, y = 0, dx = s*u1, dy = s*u2)
    ecols = []
    ewidths = []
    for x, y in Dtrain:
        yest = predict([w1, w2], x)
        marg = margin(x, y, [w1, w2])
        ecols.append("red" if yest != y else "none")
        ewidths.append(-10*marg if yest != y else 0.0)
    dots.set_edgecolors(ecols)
    dots.set_linewidth(ewidths)
    tloss = train_loss(Dtrain, loss, [w1, w2])
    error.set_text(train_loss_label(tloss))
    fig.canvas.draw()
    fig.canvas.flush_events()

widget = interactive(
    update_plot,
    w1 = FloatSlider(
        orientation = "horizontal",
        description = "w1",
              value = w1_init,
                min = w1_min,
                max = w1_max,
               step = w1_step,
             layout = Layout(width='90%')
    ),
    w2 = FloatSlider(
        orientation = "horizontal",
        description = "w2",
              value = w2_init,
                min = w2_min,
                max = w2_max,
               step = w2_step,
             layout = Layout(width='90%')
    ),
)

display(widget)
display(fig.canvas)

In [None]:
plt.close()

plt.ioff()

fig = plt.figure()
ax = fig.add_subplot(1, 1, 1, projection='3d')

def train_loss_w1w2(w1, w2):
    return train_loss(Dtrain, loss, [w1, w2])

train_loss_vec = np.vectorize(train_loss_w1w2)

w1_init, w1_min, w1_max, w1_step = 0.5, -10.0, 10.0, 0.1
w2_init, w2_min, w2_max, w2_step = 0.5, -10.0, 10.0, 0.1

w1 = np.linspace(w1_min, w1_max, 100)
w2 = np.linspace(w2_min, w2_max, 100)
X, Y = np.meshgrid(w1, w2)
Z = train_loss_vec(X, Y)

offset = 0.0

ax.plot_surface(
    X, Y, Z,
    cmap = cm.magma_r,
    rcount = 100,
    ccount = 100,
)
model3d = ax.plot(
    [w1_init],
    [w2_init],
    [train_loss_w1w2(w1_init, w2_init) + offset],
    c = "blue",
    marker = "o",
    linestyle = "None",
)
ax.set_xlabel("$w_1$")
ax.set_ylabel("$w_2$")
ax.set_zlabel("$\\mathrm{TrainLoss}(\\mathbf{w})$")
ax.set_zlim((0, None))
ax.set_title(train_loss_label(train_loss_w1w2(w1_init, w2_init)))

fig.canvas.header_visible = False

def update_3dplot(w1, w2):
    tl = train_loss_w1w2(w1, w2)
    model3d[0].set_data_3d([w1], [w2], [tl + offset])
    ax.set_title(train_loss_label(tl))
    fig.canvas.draw()
    fig.canvas.flush_events()

widget = ipywidgets.interactive(
    update_3dplot,
    w1 = FloatSlider(
        orientation = "horizontal",
        description = "w1",
              value = w1_init,
                min = w1_min,
                max = w1_max,
               step = w1_step,
             layout = Layout(width='90%')
    ),
    w2 = FloatSlider(
        orientation = "horizontal",
        description = "w2",
              value = w2_init,
                min = w2_min,
                max = w2_max,
               step = w2_step,
             layout = Layout(width='90%')
    ),
)

display(widget)

display(fig.canvas)

# Algoritmo de optimización

Queremos obtener $$\min_{\mathbf{w}}\text{TrainLoss}(\mathbf{w})$$

¿Cómo es el gradiente con respecto al vector de pesos para la pérdida de entrenamiento con la pérdida cero-uno?

In [None]:
def loss_01_for_margin(margin):
    return 1 if margin <= 0 else 0

In [None]:
plt.close()
plt.ioff()
fig = plt.figure()
ax = plt.axes()
margins = np.linspace(-3, 3, 100)
losses = [loss_01_for_margin(z) for z in margins]
ax.plot(margins, losses, label = "0 - 1")
ax.set_xlim((-3, 3))
ax.set_ylim((0, 4))
ax.set_xlabel("margin")
ax.set_ylabel("loss")
ax.legend()
fig.canvas.header_visible = False
display(fig.canvas)

# Otras funciones de pérdida

## Pérdida de articulación

$$\begin{aligned}
\text{Loss}_{\text{hinge}}(x, y, \mathbf{w}) &= \max\left\{1-(\mathbf{w}\cdot\phi(x))y, 0\right\}
\end{aligned}$$

In [None]:
def loss_hinge(x, y, w):
    """
    Calcula la pérdida de articulación para la entrada x y salida y con respecto al
    predictor dado por el vector de pesos w.
    """
    raise Exception("Not yet implemented")

In [None]:
def loss_hinge_for_margin(margin):
    return max(1 - margin, 0)

In [None]:
plt.close()
plt.ioff()
fig = plt.figure()
ax = plt.axes()
margins = np.linspace(-3, 3, 100)
losses_01 = [loss_01_for_margin(z) for z in margins]
losses_hinge = [loss_hinge_for_margin(z) for z in margins]
ax.plot(margins, losses_01, label = "0 - 1")
ax.plot(margins, losses_hinge, label = "hinge")
ax.set_xlim((-3, 3))
ax.set_ylim((0, 4))
ax.set_xlabel("margin")
ax.set_ylabel("loss")
ax.legend()
fig.canvas.header_visible = False
display(fig.canvas)

## Pérdida logística

$$\begin{aligned}
\text{Loss}_{\text{logistic}}(x, y, \mathbf{w}) &= \log\left(1+e^{-(\mathbf{w}\cdot\phi(x))y}\right)
\end{aligned}$$

In [None]:
def loss_logistic(x, y, w):
    """
    Calcula la pérdida logística para la entrada x y salida y con respecto al
    predictor dado por el vector de pesos w.
    """
    raise Exception("Not yet implemented")

In [None]:
def loss_logistic_for_margin(margin):
    return np.log(1 + np.exp(-margin))

In [None]:
plt.close()
plt.ioff()
fig = plt.figure()
ax = plt.axes()
margins = np.linspace(-3, 3, 100)
losses_01 = [loss_01_for_margin(z) for z in margins]
losses_hinge = [loss_hinge_for_margin(z) for z in margins]
losses_logistic = [loss_logistic_for_margin(z) for z in margins]
ax.plot(margins, losses_01, label = "0 - 1")
ax.plot(margins, losses_hinge, label = "hinge")
ax.plot(margins, losses_logistic, label = "logistic")
ax.set_xlim((-3, 3))
ax.set_ylim((0, 4))
ax.set_xlabel("margin")
ax.set_ylabel("loss")
ax.legend()
fig.canvas.header_visible = False
display(fig.canvas)

## Gradiente de la pérdida de articulación

$$\begin{aligned}
\text{Loss}_{\text{hinge}}(x, y, \mathbf{w}) &= \max\left\{1-(\mathbf{w}\cdot\phi(x))y, 0\right\} \\
\nabla_{\mathbf{w}} \text{Loss}_{\text{hinge}}(x, y, \mathbf{w}) &= \begin{cases}
-\phi(x)y &\text{si } 1-(\mathbf{w}\cdot\phi(x))y > 0 \\
0 &\text{en otro caso}
\end{cases}
\end{aligned}$$

In [None]:
def train_loss_hinge_grad(Dtrain, loss, w):
    """
    Calcula el gradiente con respecto al predictor dado por el vector de pesos w
    de la pérdida de entrenamiento utilizando la pérdida de articulación.
    """
    raise Exception("Not yet implemented")

# Algoritmo de optimización

Descenso de gradiente

$$\mathbf{w} \gets \mathbf{w} - \eta \nabla_{\mathbf{w}} \mathrm{TrainLoss}(\mathbf{w})$$

In [None]:
def vecscale(k, v):
    """
    Regresa el vector v escalado en un factor de k.
    """
    return [k * c for c in v]

In [None]:
def vecsum(v1, v2):
    """
    Regresa la suma de vectores entre v1 y v2.
    """
    return [c1 + c2 for c1, c2 in zip(v1, v2)]

In [None]:
def gd_update(Dtrain, loss, train_loss_grad, w, eta):
    """
    Operación de actualización de pesos utilizando descenso de gradiente con un
    tamaño de paso eta.
    """
    return vecsum(w, vecscale(-eta, train_loss_grad(Dtrain, loss, w)))

In [None]:
def gd_iter(Dtrain, loss, train_loss_grad, w_init, eta, T):
    """
    Algoritmo iterativo de descenso de gradiente con un tamaño de paso eta y
    realizando T épocas.
    """
    ws = [w_init]
    tls = [train_loss(Dtrain, loss, w)]
    for t in range(1, T+1):
        ws.append(gd_update(Dtrain, loss, train_loss_grad, ws[-1], eta))
        tls.append(train_loss(Dtrain, loss, ws[-1]))
    return ws, tls

In [None]:
plt.close()

x1min, x1max = -3, 3
x2min, x2max = -3, 3
s = 666

plt.ioff()

loss = loss_hinge
grad = train_loss_hinge_grad

w1_init, w1_min, w1_max, w1_step = 0.5, -10.0, 10.0, 0.1
w2_init, w2_min, w2_max, w2_step = 1.0, -10.0, 10.0, 0.1
eta_init, eta_min, eta_max, eta_step = 0.1, 0.01, 0.2, 0.01
epoch_init, epoch_min, epoch_max, epoch_step = 0, 0, 99, 1

w_init = [w1_init, w2_init]

ws, tls = gd_iter(Dtrain, loss, grad, w_init, eta_init, 100)

def train_loss_label(err):
    v = f"{err:.4f}"
    return "$\\mathrm{TrainLoss}(\\mathbf{w}) = "+v+"$"

fig = plt.figure(figsize=(6,6))
ax = plt.axes()
dots = plot_Dtrain(ax)
lines = plot_model(ax, w_init)
model = ax.text(
    x1min + 0.1,
    x1max - 0.1,
    model_label(0.0, 0.0),
    size = 14,
    va = "top",
)
error = ax.text(
    xmin + 0.1,
    ymax - 0.6,
    train_loss_label(train_loss(Dtrain, loss, w_init)),
    size = 18,
    va = "top",
)
ax.set_xlabel("$x_1$", size=20)
ax.set_xlim((x1min, x1max))
ax.set_ylabel("$x_2$", size=20)
ax.set_ylim((x2min, x2max))
fig.canvas.header_visible = False

def update_plot(w1, w2):
    v1 = -w2
    v2 = +w1
    u1 = w2
    u2 = -w1
    lines[0].set_data(x = 0, y = 0, dx = w1, dy = w2)
    lines[1].set_data(x = 0, y = 0, dx = s*v1, dy = s*v2)
    lines[2].set_data(x = 0, y = 0, dx = s*u1, dy = s*u2)
    ecols = []
    ewidths = []
    for x, y in Dtrain:
        yest = predict([w1, w2], x)
        marg = margin(x, y, [w1, w2])
        ecols.append("red" if yest != y else "none")
        ewidths.append(-10*marg if yest != y else 0.0)
    dots.set_edgecolors(ecols)
    dots.set_linewidth(ewidths)
    tloss = train_loss(Dtrain, loss, [w1, w2])
    error.set_text(train_loss_label(tloss))
    fig.canvas.draw()
    fig.canvas.flush_events()

def update_init(w1, w2, eta):
    global ws, tls
    w = [w1, w2]
    ws, tls = gd_iter(Dtrain, loss, grad, w, eta, 100)
    widget2.children[0].value = 0
    update_plot(ws[0][0], ws[0][1])

def update_epoch(epoch):
    w = ws[epoch]
    tl = tls[epoch]
    [w1, w2] = w
    update_plot(w1, w2)

widget1 = interactive(
    update_init,
    w1 = FloatSlider(
        orientation = "horizontal",
        description = "w1 init",
              value = w1_init,
                min = w1_min,
                max = w1_max,
               step = w1_step,
             layout = Layout(width='90%')
    ),
    w2 = FloatSlider(
        orientation = "horizontal",
        description = "w2 init",
              value = w2_init,
                min = w2_min,
                max = w2_max,
               step = w2_step,
             layout = Layout(width='90%')
    ),
    eta = FloatSlider(
        orientation = "horizontal",
        description = "eta",
              value = eta_init,
                min = eta_min,
                max = eta_max,
               step = eta_step,
             layout = Layout(width='90%'),
    ),
)

widget2 = interactive(
    update_epoch,
    epoch = IntSlider(
        orientation = "horizontal",
        description = "epoch",
              value = epoch_init, 
                min = epoch_min, 
                max = epoch_max, 
               step = epoch_step,
             layout = Layout(width='90%'),
    ),
)

display(widget1)
display(widget2)
display(fig.canvas)